#####
# Code for Creating Analysis II Measures in:
# "How partisan affect shapes citizens' perception of the political world"
# Electoral Studies 60 (2019) 102045
# Dalston G. Ward and Margit Tavits
#####

# Paper models run under R 3.4.3; Replication file written under  R 3.6.1

library(data.table) #version 1.12.2
library(lme4) # version 1.1-21

setwd("<<set as appropriate>>")

cses_r <- fread("ind_dat_extend.csv", colClasses = c("C1005" = "character")) #includes the extra measures necessary for creating measures but otherwise unusued in the analysis.

# Note: This file provides the code we used to create our measures that required some form of aggregating multiple survey responses
# We include functions that whether the code in this document exactly recreates the variable as provided in the replication data.

#####
# Create controls
#####

cses_r[ , knowledge2 := Q1 + Q2 + Q3]
cses_r[ , all.equal(knowledge, knowledge2)] #equivalence test with the included version

cses_r[ , self_extremism2 := abs(Respondent_selfPlacement - 5)]
cses_r[ , all.equal(self_extremism, self_extremism2)] #equivalence test with the included version


#####
# Polarization measures used in main analyses
#####

LR_median_cols <- paste("LR_median_", LETTERS[1:9], sep = "")
place_cols <- paste("C3011_", LETTERS[1:9], sep = "")
affect_cols <- paste(c("C3009_"), LETTERS[1:9], sep = "")

# before calculating polarization measures, the code below shows how we calculated the median values
test_medians <- cses_r[ , lapply(.SD, median, na.rm = T), .SDcols = place_cols, by = C1004]
provided_medians <- cses_r[ , lapply(.SD, unique), .SDcols = LR_median_cols, by = C1004]

# test for equivalence (uses check.attributes so it doesn't tell us that the column names are different)
all.equal(test_medians, provided_medians, check.attributes = F) 
rm(test_medians, provided_medians)

# Part A: System Polarization
cses_r$system_polarization_SD2 <- apply( cses_r[ , LR_median_cols, with = F], 1, sd, na.rm = T)

#equivalence test with the included version
cses_r[ , all.equal(system_polarization_SD, system_polarization_SD2)] 

# Part B: Affective Polarization
cses_r$affective_polarization_SD2 <- apply( cses_r[ , affect_cols, with = F], 1, sd, na.rm = T)

#equivalence test with the included version
cses_r[ , all.equal(affective_polarization_SD, affective_polarization_SD2)] 

# Part C: Ideological Polarization
cses_r$ideological_polarization_SD2 <- apply( cses_r[, place_cols, with = F], 1, sd, na.rm = T)

#equivalence test with the included version
cses_r[ , all.equal(ideological_polarization_SD, ideological_polarization_SD2)]

#####
# Polarization measures dropping parties mentioned in rv/pv/nv (for Model 1, Table SI.2.8)
#####

# Step 1: create a version of the placement columsn with NA for parties mentioned in rv/pv/nv
cses_r$C3011_A_vc <- ifelse(grepl("A", cses_r$rv) | grepl("A", cses_r$pv) | grepl("A", cses_r$nv), NA, cses_r$C3011_A)
cses_r$C3011_B_vc <- ifelse(grepl("B", cses_r$rv) | grepl("B", cses_r$pv) | grepl("B", cses_r$nv), NA, cses_r$C3011_B)
cses_r$C3011_C_vc <- ifelse(grepl("C", cses_r$rv) | grepl("C", cses_r$pv) | grepl("C", cses_r$nv), NA, cses_r$C3011_C)
cses_r$C3011_D_vc <- ifelse(grepl("D", cses_r$rv) | grepl("D", cses_r$pv) | grepl("D", cses_r$nv), NA, cses_r$C3011_D)
cses_r$C3011_E_vc <- ifelse(grepl("E", cses_r$rv) | grepl("E", cses_r$pv) | grepl("E", cses_r$nv), NA, cses_r$C3011_E)
cses_r$C3011_F_vc <- ifelse(grepl("F", cses_r$rv) | grepl("F", cses_r$pv) | grepl("F", cses_r$nv), NA, cses_r$C3011_F)
cses_r$C3011_G_vc <- ifelse(grepl("G", cses_r$rv) | grepl("G", cses_r$pv) | grepl("G", cses_r$nv), NA, cses_r$C3011_G)
cses_r$C3011_H_vc <- ifelse(grepl("H", cses_r$rv) | grepl("H", cses_r$pv) | grepl("H", cses_r$nv), NA, cses_r$C3011_H)
cses_r$C3011_I_vc <- ifelse(grepl("I", cses_r$rv) | grepl("I", cses_r$pv) | grepl("I", cses_r$nv), NA, cses_r$C3011_I)

# Step 2: recalculate polarization using these variables
cses_r$ideological_polarization_SD_vc2 <- apply( cses_r[,c("C3011_A_vc","C3011_B_vc","C3011_C_vc","C3011_D_vc","C3011_E_vc","C3011_F_vc","C3011_G_vc","C3011_H_vc","C3011_I_vc"), with = F], 1, sd, na.rm = T)

 # Step 3: # turn into NA those countries that don't includesome of the vote choice items
cses_r[C1004 %in% c("CAN_2008", "CHL_2009","FRA_2007", "NOR_2005", "KOR_2008", "TWN_2008", "USA_2008", "PER_2011"), ideological_polarization_SD_vc2 := NA]

#equivalence test with the included version
cses_r[ , all.equal(ideological_polarization_SD_vc, ideological_polarization_SD_vc2)] 

# Recreate above three steps for affective polarization
cses_r$C3009_A_vc <- ifelse(grepl("A", cses_r$rv) | grepl("A", cses_r$pv) | grepl("A", cses_r$nv), NA, cses_r$C3009_A)
cses_r$C3009_B_vc <- ifelse(grepl("B", cses_r$rv) | grepl("B", cses_r$pv) | grepl("B", cses_r$nv), NA, cses_r$C3009_B)
cses_r$C3009_C_vc <- ifelse(grepl("C", cses_r$rv) | grepl("C", cses_r$pv) | grepl("C", cses_r$nv), NA, cses_r$C3009_C)
cses_r$C3009_D_vc <- ifelse(grepl("D", cses_r$rv) | grepl("D", cses_r$pv) | grepl("D", cses_r$nv), NA, cses_r$C3009_D)
cses_r$C3009_E_vc <- ifelse(grepl("E", cses_r$rv) | grepl("E", cses_r$pv) | grepl("E", cses_r$nv), NA, cses_r$C3009_E)
cses_r$C3009_F_vc <- ifelse(grepl("F", cses_r$rv) | grepl("F", cses_r$pv) | grepl("F", cses_r$nv), NA, cses_r$C3009_F)
cses_r$C3009_G_vc <- ifelse(grepl("G", cses_r$rv) | grepl("G", cses_r$pv) | grepl("G", cses_r$nv), NA, cses_r$C3009_G)
cses_r$C3009_H_vc <- ifelse(grepl("H", cses_r$rv) | grepl("H", cses_r$pv) | grepl("H", cses_r$nv), NA, cses_r$C3009_H)
cses_r$C3009_I_vc <- ifelse(grepl("I", cses_r$rv) | grepl("I", cses_r$pv) | grepl("I", cses_r$nv), NA, cses_r$C3009_I)

cses_r$affective_polarization_SD_vc2 <- apply( cses_r[,c("C3009_A_vc","C3009_B_vc","C3009_C_vc","C3009_D_vc","C3009_E_vc","C3009_F_vc","C3009_G_vc","C3009_H_vc","C3009_I_vc")], 1,sd, na.rm = T)

# turn into NA those countries that don't includesome of the vote choice items
cses_r[C1004 %in% c("CAN_2008", "CHL_2009","FRA_2007", "NOR_2005", "KOR_2008", "TWN_2008", "USA_2008", "PER_2011"), affective_polarization_SD_vc2 := NA]

#equivalence test with the included version
cses_r[ , all.equal(affective_polarization_SD_vc, affective_polarization_SD_vc2)]

#####
# Lachat versions of polarization
#####

# Note: Respondents for the 2007 election in Japan have NA for all of the Lachat and Lupu measures. 
# This occured becuase the CSES for this year covered only the upper-house election and the vote share measures
# used to construct these measures were coded to include either lower-house results (when available) or presidential results.

weighted_var_CSES <- function(row){ 
  
  if(all(is.na(row[1:9]))){
    return(NA)
  } 
  
  parties <- row[1:9]
  votes <- 0.01*row[10:18]
  
  if(all(is.na(votes[!is.na(parties)]))){
    return(NA)
  }
  
  p <- parties[!is.na(parties) & !is.na(votes)]
  v <- votes[!is.na(parties) & !is.na(votes)]
  v <- v/sum(v, na.rm = T)
  
  return(sum(v * (p - weighted.mean(p, v))^2))
  
}

vs_cols <- paste("VS_", LETTERS[1:9], sep = "")

cses_r$system_polarization_Lachat2 <- apply( cses_r[,c(LR_median_cols, vs_cols), with = F], 1, weighted_var_CSES)
cses_r$affective_polarization_Lachat2 <- apply( cses_r[,c(affect_cols, vs_cols), with = F], 1, weighted_var_CSES)
cses_r$ideological_polarization_Lachat2 <- apply( cses_r[,c(place_cols, vs_cols), with = F], 1, weighted_var_CSES)

# equivalence tests with the included versions
cses_r[ , .(
  all.equal(system_polarization_Lachat, system_polarization_Lachat2),
  all.equal(affective_polarization_Lachat, affective_polarization_Lachat2),
  all.equal(ideological_polarization_Lachat, ideological_polarization_Lachat2)
)]

#####
# Lupu versions of polarization
#####

polar_lupu <- function(row){
  placements <- row[1:9]
  VS <- 0.01*row[10:18]
  
  placed <- placements[!is.na(placements) & !is.na(VS)]
  VS_placed <- VS[!is.na(placements)  & !is.na(VS)]
  
  if(all(is.na(placed)) | all(is.na(VS_placed))){
    return(NA)
  }
  
  VS_placed <- VS_placed/sum(VS_placed, na.rm = T)
  
  
  if(length(placed) > 1){
    
    pairs_p <- data.frame(combn(placed, 2))
    pairs_VS <- data.frame(combn(VS_placed, 2))
    
    temps <- mapply(function(p, VS, m){ abs(p[1]-p[2])*((VS[1]+VS[2])/(m-1))}, pairs_p, pairs_VS, MoreArgs = list(m = length(placed)))
    
    return(sum(temps))
    
  } else {
    return(0)
  }
  
}

cses_r$ideological_polarization_lupuW2 <- apply( cses_r[,c(place_cols, vs_cols), with = F], 1, polar_lupu)
cses_r$affective_polarization_lupuW2 <- apply( cses_r[,c(affect_cols, vs_cols), with = F], 1, polar_lupu)

# equivalence tests with the included versions
cses_r[ , .(
  all.equal(ideological_polarization_lupuW, ideological_polarization_lupuW2),
  all.equal(affective_polarization_lupuW, affective_polarization_lupuW2)
)]

#####
# "national_vote_covered" measure
#####

cses_r[ , national_vote_covered2 := rowSums(.SD, na.rm = T), .SDcols = vs_cols]

# There are three differences between national_vote_covered2 and national_vote_covered as included in the replication data

# First, the provided version includes vote shares for the upper hosue in Japan. 
# The VS columns included in this data (and used for the Lachat and Lupu measures) 
# only cover lower house (when available) or presidential (if available and lower house not available).
# Hence the version created by the above code has "0" instead of the correct total.

# Second, the provided version corrects for over counting of vote shares in the 2006 Mexican election.
# Specifically, parties that ran together but were asked about separately on the CSES were given the same vote share
# The below code corrects this:
cses_r[C1004 == "MEX_2006", national_vote_covered2 := national_vote_covered2 - 28.99*2 - 28.21]

# Third, the provided version uses vote totals from the Philippines 2010 presidential election.
# The version created by the above code instead uses lower hosue results.

# After dropping Japan and Philippnes, no differences remain.
cses_r[!C1004 %in% c("JPN_2007", "PHL_2010"), all.equal(national_vote_covered, national_vote_covered2)]

# At the end of this documentw, we provide robustness tests that show
# correcting these three issues with the provided vote share variable 
# before calculting the Lachat or Lupu measures does not affect the results.

#####
# "No Mentioned Parties" for Table SI.2.8, Model 2
#####

# recode instances of "" to NA
cses_r[rv == "", rv := NA]
cses_r[pv == "", pv := NA]
cses_r[nv == "", nv := NA]

cses_r[!is.na(rv) &!is.na(pv) & !is.na(nv) , no_mentioned2 := !grepl("A|B|C|D|E|F|G|H|I", rv) & !grepl("A|B|C|D|E|F|G|H|I", pv) & !grepl("A|B|C|D|E|F|G|H|I", nv)]

# equivalence test with the included version
cses_r[ , all.equal(no_mentioned, no_mentioned2)]

#####
# Extreme Response Style (ERS) items
#####

# Note: we used the following measures for the baseline extreme_prop item:
# (the values considered as extreme by us are in parentheses)
# C3009_A through C3009_I (the like/dislike items, 0 and 10)
# C3011_A through C3011_I (the left-right items, 0 and 10)
# C3013 (left-right self, 0 and 10)
# C3004 ("who is in power makes a difference", 1 and 5)
# C3005 ("who you vote for makes a difference", 1 and 5)
# C3006 (government performance, 1 and 4)
# C3017 (difference of choice options, 1 and 3)
# C3018 (involvement in the campaign, 1 and 4)
# C3019 (satisfaction with democracy, 1 and 5)

cses_r[in_est_sample == T, paste0("ERS_", c(90, 75), "2") := .(extreme_prop >= quantile(extreme_prop, c(.9), na.rm = T), extreme_prop >= quantile(extreme_prop, c(.75), na.rm = T))] # subsets to only estimation sample to get the right quantiles

# equivalence tests with the included versions
cses_r[in_est_sample == T,.(
  all.equal(ERS_90, ERS_902),
  all.equal(ERS_75, ERS_752)
)]

#####
# Robustness of Lachat/Lupu results to correcting the VS columns for JPN_2007 and PHL_2010
####

# correct JPN totals
cses_r[C1004 == "JPN_2007", (vs_cols) := .(40.45, 31.35, 5.96, 8.7, 2.28, NA, NA, NA, NA)]

# correct PHL totals
cses_r[C1004 == "PHL_2010", (vs_cols) := .(41.9, 26.8, 15.1, 11.2, 3.1, 1.4, NA, NA, NA)]

# recreate variables (version 3)
cses_r$system_polarization_Lachat3 <- apply( cses_r[,c(LR_median_cols, vs_cols), with = F], 1, weighted_var_CSES)
cses_r$affective_polarization_Lachat3 <- apply( cses_r[,c(affect_cols, vs_cols), with = F], 1, weighted_var_CSES)
cses_r$ideological_polarization_Lachat3 <- apply( cses_r[,c(place_cols, vs_cols), with = F], 1, weighted_var_CSES)

# refit analyses
mod_ideol_lachat <- lmer(ideological_polarization_Lachat3 ~ (1|C1004) + affective_polarization_Lachat3 + age + gender + education + knowledge + system_polarization_Lachat + self_extremism, cses_r)
summary(mod_ideol_lachat)

mod_vote_lachat <- lmer(vote_diff ~  (1|C1004) + affective_polarization_Lachat3 + age + gender + education + knowledge + self_extremism, cses_r)
summary(mod_vote_lachat)

mod_power_lachat <- lmer(power_diff ~ (1|C1004) + affective_polarization_Lachat3 + age + gender + education + knowledge + self_extremism, cses_r)
summary(mod_power_lachat)

mod_turnout_lachat <- lmer(turnout ~  (1 |C1004) + affective_polarization_Lachat3 + age + gender + education + knowledge + self_extremism, cses_r)
summary(mod_turnout_lachat)

# repeat above for Lupu measures
cses_r$ideological_polarization_lupuW3 <- apply( cses_r[,c(place_cols, vs_cols), with = F], 1, polar_lupu)
cses_r$affective_polarization_lupuW3 <- apply( cses_r[,c(affect_cols, vs_cols), with = F], 1, polar_lupu)

mod_ideol_lupu <- lmer(ideological_polarization_lupuW3 ~ (1|C1004) + affective_polarization_lupuW3 + age + gender + education + knowledge + system_polarization_SD + self_extremism, cses_r)
summary(mod_ideol_lupu)

mod_vote_lupu <- lmer(vote_diff ~  (1|C1004) + affective_polarization_lupuW3 + age + gender + education + knowledge + self_extremism, cses_r)
summary(mod_vote_lupu)

mod_power_lupu <- lmer(power_diff ~ (1|C1004) + affective_polarization_lupuW3 + age + gender + education + knowledge + self_extremism, cses_r)
summary(mod_power_lupu)

mod_turnout_lupu <- lmer(turnout ~  (1 |C1004) + affective_polarization_lupuW3 + age + gender + education + knowledge + self_extremism, cses_r)
summary(mod_turnout_lupu)

